#### FIGURES A.1-A.4: VIDEO PILOT MANIPULATION CHECKS
#### Accessibility, comprehension, and narrative framing analysis

rm(list = ls())
source("./2_code/00_setup.R")

#### LOAD AND PREPARE DATA ####

data <- fread(paste0(data_path, "pilot_videos.csv"))
data <- data[-c(1:2), ]
data <- data[(treatment != 'control')]


#### HELPER FUNCTIONS ####

# Function to identify Venezuela mentions (case-insensitive, accounting for misspellings)
identify_venezuela <- function(response) {
  if (is.na(response) || response == "") {
    return(FALSE)
  }
  
  response_lower <- tolower(response)
  
  venezuela_patterns <- c(
    "venezuela", "venezuel", "venenzuela", "venesuela", "venezolano",
    "caracas", "carabobo", "maracaibo", "barquisimeto", "bolivar"
  )
  
  any(sapply(venezuela_patterns, function(pattern) {
    grepl(pattern, response_lower, fixed = TRUE)
  }))
}

# Function to categorize migration reasons
categorize_response <- function(response) {
  if (is.na(response) || response == "") {
    return(list())
  }
  
  response_lower <- tolower(response)
  categories <- list()
  
  if (str_detect(response_lower, "econom|trabajo|empleo|oportunidad|pobreza|dinero|inflacion|crisis|sobreviv")) {
    categories <- c(categories, "Economic")
  }
  if (str_detect(response_lower, "calidad de vida|mejorar|mejor|vida|futuro|progres")) {
    categories <- c(categories, "Quality of Life")
  }
  if (str_detect(response_lower, "familia|familiar|hijos|ayudar")) {
    categories <- c(categories, "Family")
  }
  if (str_detect(response_lower, "politic|gobierno|gobernante|dictadura|regimen|maduro|chavez")) {
    categories <- c(categories, "Political")
  }
  if (str_detect(response_lower, "violencia|inseguridad|seguridad|muerte|guerra")) {
    categories <- c(categories, "Security")
  }
  if (str_detect(response_lower, "cerca|cercano|frontera|proximidad")) {
    categories <- c(categories, "Geographic")
  }
  if (str_detect(response_lower, "comida|aliment|medicament|medicina|salud|servicio|suministro")) {
    categories <- c(categories, "Basic Needs")
  }
  
  return(categories)
}

# Function to identify state dependency/public cost language
identify_state_dependency <- function(response) {
  if (is.na(response) || response == "") {
    return(FALSE)
  }
  
  response_lower <- tolower(response)
  
  dependency_patterns <- c(
    "estado", "gobierno", "public", "servicio", "hospital", "clinica",
    "medicament", "medicina", "tratamiento", "atencion", "salud",
    "ayuda", "apoyo", "asistencia", "beneficio", "subsidio",
    "gratuito", "gratis", "costo", "pagar", "dinero para",
    "necesita", "depende", "requiere", "recursos del estado"
  )
  
  return(any(str_detect(response_lower, dependency_patterns)))
}

# Function to identify labor market/economic contribution language
identify_economic_contribution <- function(response) {
  if (is.na(response) || response == "") {
    return(FALSE)
  }
  
  response_lower <- tolower(response)
  
  contribution_patterns <- c(
    "trabajar", "laboral", "emplearse",
    "buscar trabajo", "conseguir trabajo",
    "producir", "produccion", "aportar", "contribuir", "generar",
    "economia", "economico", "ingresos", "ganar", "salario",
    "mejorar", "progreso", "desarrollo", "crecimiento",
    "campo", "agricultura", "industria", "comercio", "empresa"
  )
  
  return(any(str_detect(response_lower, contribution_patterns)))
}


#### FIGURE A.1: VIDEO ACCESSIBILITY AND COMPREHENSION ####

# Panel (a): Video accessibility
video_summary <- data %>%
  summarise(
    ver_video_yes = sum(ver_video == "Sí", na.rm = TRUE),
    ver_video_total = sum(!is.na(ver_video)),
    escuchar_video_yes = sum(escuchar_video == "Sí", na.rm = TRUE),
    escuchar_video_total = sum(!is.na(escuchar_video))
  ) %>%
  mutate(
    ver_video_prop = ver_video_yes / ver_video_total,
    escuchar_video_prop = escuchar_video_yes / escuchar_video_total
  )

plot_data_access <- data.frame(
  Condition = c("Could Listen to Audio", "Could Watch Video"),
  Proportion = c(video_summary$escuchar_video_prop, video_summary$ver_video_prop),
  Count_Yes = c(video_summary$escuchar_video_yes, video_summary$ver_video_yes),
  Count_Total = c(video_summary$escuchar_video_total, video_summary$ver_video_total)
)

p_access <- ggplot(plot_data_access, aes(x = Condition, y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.7, width = 0.6) +
  geom_text(aes(label = paste0(round(Proportion * 100, 1), "%\n(", Count_Yes, "/", Count_Total, ")")),
            vjust = -0.5, size = 3.5) +
  scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, 0.2),
                     labels = scales::percent_format()) +
  labs(x = "", y = "Proportion of Participants", caption = "N = 352 participants") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 10),
    axis.title.y = element_text(size = 11),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

ggsave(filename = paste0(plot_path, "figure_A1a.png"), plot = p_access, width = 6, height = 4, dpi = 600)

# Panel (b): Venezuela identification
data$venezuela_identified <- sapply(data$pais_video, identify_venezuela)

venezuela_summary <- data %>%
  summarise(
    total_responses = n(),
    venezuela_yes = sum(venezuela_identified, na.rm = TRUE),
    venezuela_no = sum(!venezuela_identified, na.rm = TRUE),
    venezuela_prop = venezuela_yes / total_responses
  )

plot_data_venezuela <- data.frame(
  Response = c("Did Not Identify Venezuela", "Identified Venezuela"),
  Count = c(venezuela_summary$venezuela_no, venezuela_summary$venezuela_yes),
  Proportion = c(1 - venezuela_summary$venezuela_prop, venezuela_summary$venezuela_prop)
)

plot_data_venezuela$Response <- factor(plot_data_venezuela$Response, 
                                       levels = c("Did Not Identify Venezuela", "Identified Venezuela"))

p_venezuela <- ggplot(plot_data_venezuela, aes(x = Response, y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.7, width = 0.6) +
  geom_text(aes(label = paste0(round(Proportion * 100, 1), "%\n(n = ", Count, ")")),
            vjust = -0.5, size = 3.5) +
  scale_y_continuous(limits = c(0, 1.1), breaks = seq(0, 1, 0.2),
                     labels = scales::percent_format()) +
  labs(x = "", y = "Proportion of Participants",
       caption = "N = 352 participants; includes variations and misspellings of 'Venezuela'") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 10),
    axis.title.y = element_text(size = 11),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

ggsave(filename = paste0(plot_path, "figure_A1b.png"), plot = p_venezuela, width = 6, height = 4, dpi = 600)


#### FIGURE A.2: REASONS FOR VENEZUELAN MIGRATION (OVERALL) ####

category_summary <- data %>%
  rowwise() %>%
  mutate(
    categories = list(categorize_response(video_manipulation)),
    Economic = "Economic" %in% categories,
    `Quality of Life` = "Quality of Life" %in% categories,
    Family = "Family" %in% categories,
    Political = "Political" %in% categories,
    Security = "Security" %in% categories,
    Geographic = "Geographic" %in% categories,
    `Basic Needs` = "Basic Needs" %in% categories
  ) %>%
  ungroup() %>%
  summarise(
    across(c(Economic, `Quality of Life`, Family, Political, Security, Geographic, `Basic Needs`),
           ~ sum(.x, na.rm = TRUE)),
    .groups = 'drop'
  )

plot_data_reasons <- data.frame(
  Reason = c("Economic", "Quality of Life", "Political", "Basic Needs",
             "Family", "Geographic", "Security"),
  Count = c(category_summary$Economic, category_summary$`Quality of Life`,
            category_summary$Political, category_summary$`Basic Needs`,
            category_summary$Family, category_summary$Geographic,
            category_summary$Security),
  stringsAsFactors = FALSE
) %>%
  mutate(
    Proportion = Count / 352,
    Reason = factor(Reason, levels = Reason[order(-Count)])
  )

figure_A2 <- ggplot(plot_data_reasons, aes(x = Reason, y = Proportion)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.7, width = 0.7) +
  geom_text(aes(label = paste0(round(Proportion * 100, 1), "%\n(n = ", Count, ")")),
            vjust = -0.3, size = 3.2) +
  scale_y_continuous(limits = c(0, max(plot_data_reasons$Proportion) * 1.15),
                     labels = scales::percent_format()) +
  labs(x = "Migration Reason Categories", y = "Proportion of Participants",
       caption = "N = 352 participants; responses could mention multiple categories") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 10),
    axis.title.x = element_text(size = 11),
    axis.title.y = element_text(size = 11),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

figure_A2

ggsave(filename = paste0(plot_path, "figure_A2.png"), plot = figure_A2, width = 10, height = 7, dpi = 600)


#### FIGURE A.3: MIGRATION REASONS BY TREATMENT NARRATIVE ####

data <- data %>%
  mutate(
    treatment_collapsed = case_when(
      treatment == "video_humanitarian" ~ "Humanitarian Narrative",
      treatment %in% c("video_exceptional", "video_typical") ~ "Economic Narrative",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(treatment_collapsed))

data_categorized <- data %>%
  rowwise() %>%
  mutate(
    categories = list(categorize_response(video_manipulation)),
    Economic = "Economic" %in% categories,
    `Quality of Life` = "Quality of Life" %in% categories,
    Family = "Family" %in% categories,
    Political = "Political" %in% categories,
    Security = "Security" %in% categories,
    Geographic = "Geographic" %in% categories,
    `Basic Needs` = "Basic Needs" %in% categories
  ) %>%
  ungroup()

treatment_summary <- data_categorized %>%
  group_by(treatment_collapsed) %>%
  summarise(
    n = n(),
    Economic = sum(Economic, na.rm = TRUE),
    `Quality of Life` = sum(`Quality of Life`, na.rm = TRUE),
    Family = sum(Family, na.rm = TRUE),
    Political = sum(Political, na.rm = TRUE),
    Security = sum(Security, na.rm = TRUE),
    Geographic = sum(Geographic, na.rm = TRUE),
    `Basic Needs` = sum(`Basic Needs`, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    Economic_prop = Economic / n,
    `Quality of Life_prop` = `Quality of Life` / n,
    Family_prop = Family / n,
    Political_prop = Political / n,
    Security_prop = Security / n,
    Geographic_prop = Geographic / n,
    `Basic Needs_prop` = `Basic Needs` / n
  )

plot_data_treatment <- treatment_summary %>%
  select(treatment_collapsed, n, ends_with("_prop")) %>%
  pivot_longer(
    cols = ends_with("_prop"),
    names_to = "Category",
    values_to = "Proportion"
  ) %>%
  mutate(
    Category = str_remove(Category, "_prop"),
    Category = factor(Category, levels = c("Economic", "Quality of Life", "Political",
                                           "Basic Needs", "Family", "Geographic", "Security"))
  )

figure_A3 <- ggplot(plot_data_treatment, aes(x = Category, y = Proportion, fill = treatment_collapsed)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(Proportion * 100, 1), "%")),
            position = position_dodge(width = 0.7), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("Humanitarian Narrative" = "coral",
                               "Economic Narrative" = "steelblue")) +
  scale_y_continuous(limits = c(0, max(plot_data_treatment$Proportion) * 1.15),
                     labels = scales::percent_format()) +
  labs(x = "Migration Reason Categories", y = "Proportion of Participants",
       fill = "Treatment Condition",
       caption = "Note: Responses could mention multiple categories") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 10),
    axis.title.x = element_text(size = 11),
    axis.title.y = element_text(size = 11),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

figure_A3

ggsave(filename = paste0(plot_path, "figure_A3.png"), plot = figure_A3, width = 12, height = 8, dpi = 600)


#### FIGURE A.4: STATE DEPENDENCY VS. ECONOMIC CONTRIBUTION ####

data_classified <- data %>%
  rowwise() %>%
  mutate(
    state_dependency = identify_state_dependency(video_manipulation),
    economic_contribution = identify_economic_contribution(video_manipulation)
  ) %>%
  ungroup()

theme_summary <- data_classified %>%
  group_by(treatment_collapsed) %>%
  summarise(
    n = n(),
    state_dependency_count = sum(state_dependency, na.rm = TRUE),
    economic_contribution_count = sum(economic_contribution, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    state_dependency_prop = state_dependency_count / n,
    economic_contribution_prop = economic_contribution_count / n
  )

plot_data_themes <- theme_summary %>%
  select(treatment_collapsed, state_dependency_prop, economic_contribution_prop) %>%
  pivot_longer(
    cols = c(state_dependency_prop, economic_contribution_prop),
    names_to = "Theme",
    values_to = "Proportion"
  ) %>%
  mutate(
    Theme = case_when(
      Theme == "state_dependency_prop" ~ "State Dependency/\nPublic Costs",
      Theme == "economic_contribution_prop" ~ "Economic Contribution/\nLabor Market"
    )
  )

figure_A4 <- ggplot(plot_data_themes, aes(x = Theme, y = Proportion, fill = treatment_collapsed)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.8, width = 0.6) +
  geom_text(aes(label = paste0(round(Proportion * 100, 1), "%")),
            position = position_dodge(width = 0.6), vjust = -0.5, size = 3.5) +
  scale_fill_manual(values = c("Humanitarian Narrative" = "coral",
                               "Economic Narrative" = "steelblue")) +
  scale_y_continuous(limits = c(0, max(plot_data_themes$Proportion) * 1.15),
                     labels = scales::percent_format()) +
  labs(caption = "N = 352 participants; responses could mention multiple themes",
       x = "Thematic Framework", y = "Proportion of Participants",
       fill = "Treatment Condition") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 11),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

figure_A4

ggsave(filename = paste0(plot_path, "figure_A4.png"), plot = figure_A4, width = 8, height = 6, dpi = 600)
